Import data in list via function Calll to import_csv_data().
data <- import_csv_data(path = "Olist_data/")
data_marketing <- import_csv_data(path = "Olist_Marketing_data/")
Source https://www.kaggle.com/jungjoonlee/eda-with-ecommerce-marketplace-seller-side
Marketing Qualified Lead (MQL) means a potential reseller/manufacturer who has an interest in selling their products on Olist. After a MQL filled a form on landing page to sign up for seller, a Sales Development Representative(SDR) contacted the MQL and gathered more information about the lead. Then a Sales Representative(SR) consulted the MQL. So interaction between SDRs/SRs and MQLs can affect conversion from MQLs to sellers. A MQL who finally signed up for seller is called a closed deal.
skim(data_marketing$olist_closed_deals_dataset)
| Name | data_marketing$olist_clos… |
| Number of rows | 842 |
| Number of columns | 14 |
| _______________________ | |
| Column type frequency: | |
| factor | 12 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| mql_id | 0 | 1 | FALSE | 842 | 000: 1, 009: 1, 00d: 1, 010: 1 |
| seller_id | 0 | 1 | FALSE | 842 | 000: 1, 012: 1, 013: 1, 01f: 1 |
| sdr_id | 0 | 1 | FALSE | 32 | 4b3: 140, 068: 81, 56b: 74, 9d1: 66 |
| sr_id | 0 | 1 | FALSE | 22 | 4ef: 133, d3d: 82, 656: 74, 85f: 64 |
| won_date | 0 | 1 | FALSE | 824 | 201: 6, 201: 4, 201: 3, 201: 3 |
| business_segment | 0 | 1 | FALSE | 34 | hom: 105, hea: 93, car: 77, hou: 71 |
| lead_type | 0 | 1 | FALSE | 9 | onl: 332, onl: 126, ind: 123, off: 104 |
| lead_behaviour_profile | 0 | 1 | FALSE | 10 | cat: 407, emp: 177, eag: 123, wol: 95 |
| has_company | 0 | 1 | FALSE | 3 | emp: 779, Tru: 58, Fal: 5 |
| has_gtin | 0 | 1 | FALSE | 3 | emp: 778, Tru: 54, Fal: 10 |
| average_stock | 0 | 1 | FALSE | 7 | emp: 776, 5-2: 22, 50-: 15, 1-5: 10 |
| business_type | 0 | 1 | FALSE | 4 | res: 587, man: 242, emp: 10, oth: 3 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| declared_product_catalog_size | 773 | 0.08 | 233.03 | 352.38 | 1 | 30 | 100 | 300 | 2e+03 | ▇▁▁▁▁ |
| declared_monthly_revenue | 0 | 1.00 | 73377.68 | 1744799.18 | 0 | 0 | 0 | 0 | 5e+07 | ▇▁▁▁▁ |
head(data_marketing$olist_closed_deals_dataset) %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))
skim(data_marketing$olist_marketing_qualified_leads_dataset)
| Name | data_marketing$olist_mark… |
| Number of rows | 8000 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| factor | 4 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| mql_id | 0 | 1 | FALSE | 8000 | 000: 1, 000: 1, 001: 1, 002: 1 |
| first_contact_date | 0 | 1 | FALSE | 336 | 201: 93, 201: 76, 201: 75, 201: 71 |
| landing_page_id | 0 | 1 | FALSE | 495 | b76: 912, 22c: 883, 583: 495, 887: 445 |
| origin | 0 | 1 | FALSE | 11 | org: 2296, pai: 1586, soc: 1350, unk: 1099 |
head(data_marketing$olist_marketing_qualified_leads_dataset) %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))
# merge dataframes
df_deals <- data_marketing$olist_closed_deals_dataset %>%
right_join(data_marketing$olist_marketing_qualified_leads_dataset, by = "mql_id")
# createa monthly dataframe for deals by origin by first contact date
df_deals_monthly <- df_deals %>%
mutate(first_contact_month = format.Date(as_date(first_contact_date), "%Y%m"),
deal_closed = if_else(is.na(seller_id), 0, 1),
origin = if_else(origin == "", "unknown", as.character(origin))) %>%
group_by(origin, first_contact_month) %>%
summarize(leads = n(),
closed_deals = sum(deal_closed)) %>%
ungroup()
plot_leads <- df_deals_monthly %>%
group_by(first_contact_month) %>%
summarize(leads = sum(leads)) %>%
ggplot(., aes(x = first_contact_month, y = leads, group = 1)) +
geom_line() +
geom_point() +
theme_bw() +
labs(x = "", y = "Number of Leads", colour = "origin")
ggplotly(plot_leads)
#plot_leads
plot_leads_origin <- df_deals_monthly %>%
select(-closed_deals) %>%
pivot_wider(., names_from = origin, values_from = leads) %>%
replace(is.na(.), 0) %>%
ggplot(., aes(x = first_contact_month, group = 1)) +
geom_line(aes(y = direct_traffic, color = "direct_traffic")) +
geom_line(aes(y = display, color = "display"))+
geom_line(aes(y = email, color = "email")) +
geom_line(aes(y = organic_search, color = "organic_search")) +
geom_line(aes(y = paid_search, color = "paid_search")) +
geom_line(aes(y = referral, color = "referral")) +
geom_line(aes(y = social, color = "social")) +
geom_line(aes(y = other_publicities, color = "other_publicities")) +
geom_line(aes(y = other, color = "other")) +
geom_line(aes(y = unknown, color = "unknown")) +
geom_point(aes(y = direct_traffic, color = "direct_traffic")) +
geom_point(aes(y = display, color = "display"))+
geom_point(aes(y = email, color = "email")) +
geom_point(aes(y = organic_search, color = "organic_search")) +
geom_point(aes(y = paid_search, color = "paid_search")) +
geom_point(aes(y = referral, color = "referral")) +
geom_point(aes(y = social, color = "social")) +
geom_point(aes(y = other_publicities, color = "other_publicities")) +
geom_point(aes(y = other, color = "other")) +
geom_point(aes(y = unknown, color = "unknown")) +
theme_bw() +
labs(x = "", y = "Number of Leads", colour = "origin")
ggplotly(plot_leads_origin)
#plot_leads_origin
plot_bar_leads_origin <- df_deals_monthly %>%
select(-closed_deals) %>%
group_by(origin) %>%
summarize(leads = sum(leads)) %>%
ggplot(., aes(x = reorder(origin, -leads), y = leads, group = 1)) +
geom_col(aes(fill = origin))+
geom_text(aes(label = ..y..), vjust = -1) +
theme_bw() +
labs(x = "", y = "Number of Leads", colour = "origin") +
scale_y_continuous(limits = c(0, 2350))
ggplotly(plot_bar_leads_origin)
#plot_bar_leads_origin
plot_bar_leads_origin_per <- df_deals_monthly %>%
select(-closed_deals) %>%
group_by(origin) %>%
summarize(leads = sum(leads)) %>%
ggplot(., aes(x = reorder(origin, -leads), y = leads / sum(leads), group = 1)) +
geom_col(aes(fill = origin))+
geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
theme_bw() +
labs(x = "", y = "Number of Leads", colour = "origin") +
scale_y_continuous(labels=scales::percent, limits = c(0, 0.3))
ggplotly(plot_bar_leads_origin_per)
#plot_bar_leads_origin_per
plot_leads <- df_deals_monthly %>%
group_by(first_contact_month) %>%
summarize(leads = sum(closed_deals)) %>%
ggplot(., aes(x = first_contact_month, y = leads, group = 1)) +
geom_line() +
geom_point() +
theme_bw() +
labs(x = "", y = "Number of Closed Deals", colour = "origin")
ggplotly(plot_leads)
# plot_leads
plot_leads_origin <- df_deals_monthly %>%
select(-leads) %>%
pivot_wider(., names_from = origin, values_from = closed_deals) %>%
replace(is.na(.), 0) %>%
ggplot(., aes(x = first_contact_month, group = 1)) +
geom_line(aes(y = direct_traffic, color = "direct_traffic")) +
geom_line(aes(y = display, color = "display"))+
geom_line(aes(y = email, color = "email")) +
geom_line(aes(y = organic_search, color = "organic_search")) +
geom_line(aes(y = paid_search, color = "paid_search")) +
geom_line(aes(y = referral, color = "referral")) +
geom_line(aes(y = social, color = "social")) +
geom_line(aes(y = other_publicities, color = "other_publicities")) +
geom_line(aes(y = other, color = "other")) +
geom_line(aes(y = unknown, color = "unknown")) +
geom_point(aes(y = direct_traffic, color = "direct_traffic")) +
geom_point(aes(y = display, color = "display"))+
geom_point(aes(y = email, color = "email")) +
geom_point(aes(y = organic_search, color = "organic_search")) +
geom_point(aes(y = paid_search, color = "paid_search")) +
geom_point(aes(y = referral, color = "referral")) +
geom_point(aes(y = social, color = "social")) +
geom_point(aes(y = other_publicities, color = "other_publicities")) +
geom_point(aes(y = other, color = "other")) +
geom_point(aes(y = unknown, color = "unknown")) +
theme_bw() +
labs(x = "", y = "Number of Closed Deals", colour = "origin")
ggplotly(plot_leads_origin)
# plot_leads_origin
plot_bar_leads_origin <- df_deals_monthly %>%
select(-leads) %>%
group_by(origin) %>%
summarize(leads = sum(closed_deals)) %>%
ggplot(., aes(x = reorder(origin, -leads), y = leads, group = 1)) +
geom_col(aes(fill = origin))+
geom_text(aes(label = ..y..), vjust = -1) +
theme_bw() +
labs(x = "", y = "Number of Closed Deals", colour = "origin") +
scale_y_continuous(limits = c(0, 300))
ggplotly(plot_bar_leads_origin)
# plot_bar_leads_origin
plot_bar_leads_origin_per <- df_deals_monthly %>%
select(-leads) %>%
group_by(origin) %>%
summarize(closed_deals = sum(closed_deals)) %>%
ggplot(., aes(x = reorder(origin, -closed_deals), y = closed_deals / sum(closed_deals), group = 1)) +
geom_col(aes(fill = origin))+
geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
theme_bw() +
labs(x = "", y = "Number of Closed Deals", colour = "origin") +
scale_y_continuous(labels=scales::percent, limits = c(0, 0.35))
ggplotly(plot_bar_leads_origin_per)
# plot_bar_leads_origin_per
plot_conversion <- df_deals_monthly %>%
group_by(first_contact_month) %>%
summarize(leads = sum(leads),
closed_deals = sum(closed_deals)) %>%
mutate(conversion_rate = closed_deals / leads) %>%
ggplot(., aes(x = first_contact_month, y = conversion_rate, group = 1)) +
geom_line() +
geom_point() +
theme_bw() +
labs(x = "", y = "Conversion Rate", colour = "origin") +
scale_y_continuous(labels=scales::percent)
ggplotly(plot_conversion)
# plot_conversion
plot_bar_conversion_origin_2017 <- df_deals_monthly %>%
filter(substr(first_contact_month,1,4) == "2017") %>%
group_by(origin) %>%
summarize(leads = sum(leads),
closed_deals = sum(closed_deals)) %>%
mutate(conversion_rate = closed_deals / leads) %>%
ggplot(., aes(x = reorder(origin, -conversion_rate), y = conversion_rate, group = 1)) +
geom_col(aes(fill = origin))+
geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
theme_bw() +
labs(x = "", y = "Conversion Rate", colour = "origin") +
scale_y_continuous(labels=scales::percent, limits = c(0, 0.20))
ggplotly(plot_bar_conversion_origin_2017)
# plot_bar_conversion_origin_2017
plot_bar_conversion_origin_2018 <- df_deals_monthly %>%
filter(substr(first_contact_month,1,4) == "2018") %>%
group_by(origin) %>%
summarize(leads = sum(leads),
closed_deals = sum(closed_deals)) %>%
mutate(conversion_rate = closed_deals / leads) %>%
ggplot(., aes(x = reorder(origin, -conversion_rate), y = conversion_rate, group = 1)) +
geom_col(aes(fill = origin))+
geom_text(aes(label = paste0(round(..y.. * 100,1), "%")), vjust = -1) +
theme_bw() +
labs(x = "", y = "Conversion Rate", colour = "origin") +
scale_y_continuous(labels=scales::percent, limits = c(0, 0.30))
ggplotly(plot_bar_conversion_origin_2018)
# plot_bar_conversion_origin_2018
df_deals <- df_deals %>%
mutate(deal_closed = if_else(is.na(seller_id), 0, 1)) # add boolean for closed deals
df_seller <- df_deals %>%
left_join(data$olist_order_items_dataset, by = "seller_id") %>%
filter(!is.na(seller_id)) %>%
group_by(seller_id, business_segment, lead_type, business_type, origin) %>%
summarize(closed_deals = sum(deal_closed),
revenue = sum(price)) %>%
ungroup()
skim(df_seller)
| Name | df_seller |
| Number of rows | 842 |
| Number of columns | 7 |
| _______________________ | |
| Column type frequency: | |
| character | 1 |
| factor | 4 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| seller_id | 0 | 1 | 32 | 32 | 0 | 842 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| business_segment | 0 | 1 | FALSE | 34 | hom: 105, hea: 93, car: 77, hou: 71 |
| lead_type | 0 | 1 | FALSE | 9 | onl: 332, onl: 126, ind: 123, off: 104 |
| business_type | 0 | 1 | FALSE | 4 | res: 587, man: 242, emp: 10, oth: 3 |
| origin | 0 | 1 | FALSE | 11 | org: 271, pai: 195, unk: 179, soc: 75 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| closed_deals | 0 | 1.00 | 6.54 | 25.77 | 1.0 | 1.00 | 1.0 | 4 | 578 | ▇▁▁▁▁ |
| revenue | 462 | 0.45 | 1781.19 | 6800.94 | 18.9 | 179.65 | 547.4 | 1286 | 113629 | ▇▁▁▁▁ |
For first analysis: drop all sellers with NA Revenue and transform empty observations to “unknown”.
df_seller_man <- df_seller %>%
filter(!is.na(revenue)) %>%
mutate(business_segment = if_else(business_segment == "", "unknown", as.character(business_segment)),
lead_type = if_else(lead_type == "", "unknown", as.character(lead_type)),
business_type = if_else(business_type == "", "unknown", as.character(business_type)),
origin = if_else(origin == "", "unknown", as.character(origin)),
Revenue_per_Order = revenue / closed_deals) %>%
mutate_if(is.character, as.factor)
skim(df_seller_man)
| Name | df_seller_man |
| Number of rows | 380 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| factor | 5 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| seller_id | 0 | 1 | FALSE | 380 | 012: 1, 01f: 1, 02f: 1, 03a: 1 |
| business_segment | 0 | 1 | FALSE | 29 | hea: 45, hom: 44, hou: 44, con: 32 |
| lead_type | 0 | 1 | FALSE | 8 | onl: 172, onl: 79, ind: 41, off: 30 |
| business_type | 0 | 1 | FALSE | 3 | res: 287, man: 90, unk: 3 |
| origin | 0 | 1 | FALSE | 9 | org: 113, pai: 101, unk: 85, dir: 31 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| closed_deals | 0 | 1 | 13.27 | 37.30 | 1.00 | 2.00 | 5.00 | 12.00 | 578 | ▇▁▁▁▁ |
| revenue | 0 | 1 | 1781.19 | 6800.94 | 18.90 | 179.65 | 547.40 | 1286.00 | 113629 | ▇▁▁▁▁ |
| Revenue_per_Order | 0 | 1 | 169.00 | 286.16 | 9.99 | 49.94 | 87.39 | 160.66 | 2749 | ▇▁▁▁▁ |
df_business_segment <- df_seller_man %>%
group_by(business_segment) %>%
summarize(revenue = sum(revenue),
closed_deals = sum(closed_deals)) %>%
mutate(Revenue_per_Order = revenue / closed_deals,
segment_small = substr(business_segment, 1, 6)) %>%
ungroup() %>%
arrange(desc(Revenue_per_Order))
df_business_segment %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))
ggplot(df_business_segment, aes(x = reorder(segment_small, -Revenue_per_Order), y = Revenue_per_Order)) +
geom_col() +
theme_bw() +
labs(x = "Business segment", y = "Revenue per Order")
ggplot(df_business_segment, aes(x = reorder(segment_small, -closed_deals), y = closed_deals)) +
geom_col() +
theme_bw() +
labs(x = "Business segment", y = "Number of Orders")
ggplot(df_business_segment, aes(x = closed_deals, y = Revenue_per_Order)) +
geom_text(aes(label = segment_small)) +
theme_bw() +
labs(x = "Number of Orders", y = "Revenue per Order")
df_lead_type <- df_seller_man %>%
group_by(lead_type) %>%
summarize(revenue = sum(revenue),
closed_deals = sum(closed_deals)) %>%
mutate(Revenue_per_Order = revenue / closed_deals,
lead_small = substr(lead_type, 1, 5)) %>%
ungroup() %>%
arrange(desc(Revenue_per_Order))
df_lead_type %>% datatable(., rownames = FALSE, options = list(scrollX = TRUE))
ggplot(df_lead_type, aes(x = reorder(lead_type, -revenue), y = revenue)) +
geom_col() +
theme_bw() +
labs(x = "Lead Type", y = "Revenue")
ggplot(df_lead_type, aes(x = reorder(lead_type, -Revenue_per_Order), y = Revenue_per_Order)) +
geom_col() +
theme_bw() +
labs(x = "Lead Type", y = "Revenue per Order")
ggplot(df_lead_type, aes(x = reorder(lead_type, -closed_deals), y = closed_deals)) +
geom_col() +
theme_bw() +
labs(x = "Lead Type", y = "Number of Orders")
ggplot(df_lead_type, aes(x = closed_deals, y = Revenue_per_Order)) +
geom_text(aes(label = lead_type)) +
theme_bw() +
labs(x = "Number of Orders", y = "Revenue per Order")
# relevel business segemnt for better comparison
df_seller_man <- within(df_seller_man,business_segment <- relevel(business_segment, ref = 2))
lm_seller_deals <- lm(closed_deals ~ business_segment + lead_type + origin + business_type, data = df_seller_man)
summary(lm_seller_deals)
##
## Call:
## lm(formula = closed_deals ~ business_segment + lead_type + origin +
## business_type, data = df_seller_man)
##
## Residuals:
## Min 1Q Median 3Q Max
## -183.61 -9.44 -3.70 3.89 364.66
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -6.9247 10.6753 -0.649
## business_segmentair_conditioning -4.8921 25.2280 -0.194
## business_segmentbaby -2.8331 15.4100 -0.184
## business_segmentbags_backpacks 6.9612 11.1987 0.622
## business_segmentbed_bath_table 15.5402 12.7164 1.222
## business_segmentbooks 16.9975 18.4511 0.921
## business_segmentcar_accessories -3.1090 8.8908 -0.350
## business_segmentcomputers 2.8141 12.1129 0.232
## business_segmentconstruction_tools_house_garden 2.4132 8.7559 0.276
## business_segmentfashion_accessories 2.0389 15.3596 0.133
## business_segmentfood_drink 3.5310 13.2767 0.266
## business_segmentfood_supplement 7.8826 13.9343 0.566
## business_segmentgames_consoles -5.8770 35.0201 -0.168
## business_segmentgifts 3.0202 21.0013 0.144
## business_segmenthandcrafted 6.9934 22.5303 0.310
## business_segmenthealth_beauty 13.1238 8.2159 1.597
## business_segmenthome_appliances 21.0720 16.6052 1.269
## business_segmenthome_decor 3.3444 8.2296 0.406
## business_segmenthome_office_furniture 31.2110 21.2495 1.469
## business_segmenthousehold_utilities 7.9587 8.1576 0.976
## business_segmentmusic_instruments -4.4558 18.3112 -0.243
## business_segmentparty 19.5763 35.9591 0.544
## business_segmentpet 10.7230 10.7250 1.000
## business_segmentphone_mobile 10.8452 17.5510 0.618
## business_segmentsmall_appliances 0.2652 15.1925 0.017
## business_segmentsports_leisure 4.5255 11.2215 0.403
## business_segmentstationery 3.3163 14.6162 0.227
## business_segmenttoys 7.1314 12.5913 0.566
## business_segmentwatches 192.9446 20.9722 9.200
## lead_typeoffline -12.3781 9.2437 -1.339
## lead_typeonline_beginner 1.6527 9.6251 0.172
## lead_typeonline_big 14.6871 7.0341 2.088
## lead_typeonline_medium 4.1717 6.2856 0.664
## lead_typeonline_small 8.8368 8.8846 0.995
## lead_typeonline_top 2.5438 15.4583 0.165
## lead_typeunknown 9.9978 21.6836 0.461
## origindisplay -1.5010 25.4692 -0.059
## originemail -9.4293 16.4924 -0.572
## originorganic_search 3.9567 7.1893 0.550
## originother 32.0729 25.3783 1.264
## originpaid_search 2.7265 7.2631 0.375
## originreferral 1.5886 13.6553 0.116
## originsocial 4.5100 9.1208 0.494
## originunknown 6.9417 7.4945 0.926
## business_typereseller 5.6883 4.6977 1.211
## business_typeunknown 10.3336 24.2535 0.426
## Pr(>|t|)
## (Intercept) 0.5170
## business_segmentair_conditioning 0.8464
## business_segmentbaby 0.8542
## business_segmentbags_backpacks 0.5346
## business_segmentbed_bath_table 0.2225
## business_segmentbooks 0.3576
## business_segmentcar_accessories 0.7268
## business_segmentcomputers 0.8164
## business_segmentconstruction_tools_house_garden 0.7830
## business_segmentfashion_accessories 0.8945
## business_segmentfood_drink 0.7904
## business_segmentfood_supplement 0.5720
## business_segmentgames_consoles 0.8668
## business_segmentgifts 0.8857
## business_segmenthandcrafted 0.7564
## business_segmenthealth_beauty 0.1111
## business_segmenthome_appliances 0.2053
## business_segmenthome_decor 0.6847
## business_segmenthome_office_furniture 0.1428
## business_segmenthousehold_utilities 0.3300
## business_segmentmusic_instruments 0.8079
## business_segmentparty 0.5865
## business_segmentpet 0.3181
## business_segmentphone_mobile 0.5370
## business_segmentsmall_appliances 0.9861
## business_segmentsports_leisure 0.6870
## business_segmentstationery 0.8206
## business_segmenttoys 0.5715
## business_segmentwatches <2e-16 ***
## lead_typeoffline 0.1815
## lead_typeonline_beginner 0.8638
## lead_typeonline_big 0.0376 *
## lead_typeonline_medium 0.5073
## lead_typeonline_small 0.3206
## lead_typeonline_top 0.8694
## lead_typeunknown 0.6450
## origindisplay 0.9530
## originemail 0.5679
## originorganic_search 0.5824
## originother 0.2072
## originpaid_search 0.7076
## originreferral 0.9075
## originsocial 0.6213
## originunknown 0.3550
## business_typereseller 0.2268
## business_typeunknown 0.6703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 34.24 on 334 degrees of freedom
## Multiple R-squared: 0.2572, Adjusted R-squared: 0.1571
## F-statistic: 2.57 on 45 and 334 DF, p-value: 9.235e-07
lm_seller_revenue <- lm(Revenue_per_Order ~ business_segment + lead_type + origin + business_type, data = df_seller_man)
summary(lm_seller_revenue)
##
## Call:
## lm(formula = Revenue_per_Order ~ business_segment + lead_type +
## origin + business_type, data = df_seller_man)
##
## Residuals:
## Min 1Q Median 3Q Max
## -951.67 -109.71 -45.14 39.05 2362.85
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 120.51 86.13 1.399
## business_segmentair_conditioning 933.63 203.54 4.587
## business_segmentbaby 51.92 124.33 0.418
## business_segmentbags_backpacks -114.59 90.35 -1.268
## business_segmentbed_bath_table -159.26 102.60 -1.552
## business_segmentbooks -161.11 148.86 -1.082
## business_segmentcar_accessories 77.48 71.73 1.080
## business_segmentcomputers -155.06 97.73 -1.587
## business_segmentconstruction_tools_house_garden 41.77 70.64 0.591
## business_segmentfashion_accessories -128.99 123.92 -1.041
## business_segmentfood_drink -157.02 107.12 -1.466
## business_segmentfood_supplement -69.80 112.42 -0.621
## business_segmentgames_consoles 46.97 282.54 0.166
## business_segmentgifts -33.16 169.44 -0.196
## business_segmenthandcrafted -206.50 181.77 -1.136
## business_segmenthealth_beauty -145.55 66.29 -2.196
## business_segmenthome_appliances -29.26 133.97 -0.218
## business_segmenthome_decor -96.08 66.40 -1.447
## business_segmenthome_office_furniture -132.93 171.44 -0.775
## business_segmenthousehold_utilities -66.24 65.82 -1.006
## business_segmentmusic_instruments 121.77 147.73 0.824
## business_segmentparty -238.59 290.12 -0.822
## business_segmentpet -103.45 86.53 -1.196
## business_segmentphone_mobile -163.34 141.60 -1.154
## business_segmentsmall_appliances 71.50 122.57 0.583
## business_segmentsports_leisure -66.21 90.54 -0.731
## business_segmentstationery -92.60 117.92 -0.785
## business_segmenttoys -131.81 101.59 -1.298
## business_segmentwatches -60.42 169.20 -0.357
## lead_typeoffline 163.53 74.58 2.193
## lead_typeonline_beginner 140.91 77.66 1.815
## lead_typeonline_big 73.37 56.75 1.293
## lead_typeonline_medium 36.94 50.71 0.728
## lead_typeonline_small 31.58 71.68 0.441
## lead_typeonline_top 101.03 124.72 0.810
## lead_typeunknown 354.70 174.94 2.028
## origindisplay -98.91 205.49 -0.481
## originemail 344.62 133.06 2.590
## originorganic_search 62.01 58.00 1.069
## originother 82.27 204.75 0.402
## originpaid_search 84.20 58.60 1.437
## originreferral 128.47 110.17 1.166
## originsocial 25.50 73.59 0.347
## originunknown 34.33 60.47 0.568
## business_typereseller -14.76 37.90 -0.389
## business_typeunknown 31.88 195.68 0.163
## Pr(>|t|)
## (Intercept) 0.1627
## business_segmentair_conditioning 6.37e-06 ***
## business_segmentbaby 0.6765
## business_segmentbags_backpacks 0.2056
## business_segmentbed_bath_table 0.1215
## business_segmentbooks 0.2799
## business_segmentcar_accessories 0.2809
## business_segmentcomputers 0.1135
## business_segmentconstruction_tools_house_garden 0.5547
## business_segmentfashion_accessories 0.2987
## business_segmentfood_drink 0.1436
## business_segmentfood_supplement 0.5351
## business_segmentgames_consoles 0.8681
## business_segmentgifts 0.8450
## business_segmenthandcrafted 0.2568
## business_segmenthealth_beauty 0.0288 *
## business_segmenthome_appliances 0.8272
## business_segmenthome_decor 0.1488
## business_segmenthome_office_furniture 0.4387
## business_segmenthousehold_utilities 0.3149
## business_segmentmusic_instruments 0.4104
## business_segmentparty 0.4114
## business_segmentpet 0.2327
## business_segmentphone_mobile 0.2495
## business_segmentsmall_appliances 0.5601
## business_segmentsports_leisure 0.4651
## business_segmentstationery 0.4329
## business_segmenttoys 0.1954
## business_segmentwatches 0.7212
## lead_typeoffline 0.0290 *
## lead_typeonline_beginner 0.0705 .
## lead_typeonline_big 0.1969
## lead_typeonline_medium 0.4669
## lead_typeonline_small 0.6598
## lead_typeonline_top 0.4185
## lead_typeunknown 0.0434 *
## origindisplay 0.6306
## originemail 0.0100 *
## originorganic_search 0.2858
## originother 0.6881
## originpaid_search 0.1517
## originreferral 0.2444
## originsocial 0.7292
## originunknown 0.5705
## business_typereseller 0.6972
## business_typeunknown 0.8707
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 276.3 on 334 degrees of freedom
## Multiple R-squared: 0.1786, Adjusted R-squared: 0.06795
## F-statistic: 1.614 on 45 and 334 DF, p-value: 0.01027